@inproceedings{li2020oscar,
  title={Oscar: Object-semantics aligned pre-training for vision-language tasks},
  author={Li, Xiujun and Yin, Xi and Li, Chunyuan and Zhang, Pengchuan and Hu, Xiaowei and Zhang, Lei and Wang, Lijuan and Hu, Houdong and Dong, Li and Wei, Furu and others},
  booktitle={European Conference on Computer Vision},
  pages={121--137},
  year={2020},
  organization={Springer},
  url={https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123750120.pdf}
}